from torchvision import datasets
from torch.utils.data import DataLoader
from torch import optim
from torchvision import transforms
import torch
import matplotlib.pyplot as plt
from torch.utils.data import random_split
import random
from skimage.transform import resize
Explainable AI¶
In this assignment I explore explainable AI, starting with small-scale experiments on MNIST using a NumPy-based model. The goal is to build a foundation for understanding how model decisions can be interpreted. Gradually, I move toward implementing a standard method (Grad-CAM), and then scale the approach to more complex models and datasets using PyTorch.
device = torch.device("cuda:0" if torch.cuda.is_available() else "mps")
%load_ext autoreload
%autoreload 2
transform = transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
batch_size = 128
train_subset, _ = random_split(train_dataset, [1000, len(train_dataset) - 1000])
test_subset, _ = random_split(test_dataset, [200, len(test_dataset) - 200])
# Create DataLoader with subset
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
classes = train_dataset.classes
samples = {}
for img, label in train_dataset:
if label not in samples:
samples[label] = img
if len(samples) == len(classes):
break
# Plot the images.
fig, axs = plt.subplots(1, len(classes), figsize=(15, 4))
for label, ax in enumerate(axs):
img_np = samples[label].permute(1, 2, 0).numpy()
ax.imshow(img_np, cmap='gray')
ax.set_title(classes[label])
ax.axis('off')
plt.tight_layout()
plt.show()
def softmax(x):
x_shifted = x - np.max(x, axis=1, keepdims=True)
exps = np.exp(x_shifted)
return exps / np.sum(exps, axis=1, keepdims=True)
def l2_regularization(W, reg_strength):
loss = np.sum(np.square(W)) * reg_strength
grad = 2 * reg_strength * W
return loss, grad
def softmax_with_cross_entropy(predictions, target_index):
batch_size = predictions.shape[0]
probs = softmax(predictions)
correct_probs = probs[np.arange(batch_size), target_index]
loss = -np.sum(np.log(correct_probs)) / batch_size # Average loss
d_pred = probs.copy()
d_pred[np.arange(batch_size), target_index] -= 1
d_pred /= batch_size # Normalize gradient
return loss, d_pred
class Param:
def __init__(self, value):
self.value = value
self.grad = np.zeros_like(value)
class ReLULayer:
def __init__(self):
self.mask = None
def forward(self, X):
self.mask = X > 0
X = X * self.mask
return X
def backward(self, d_out):
d_result = d_out * self.mask
return d_result
def params(self):
return {}
class FullyConnectedLayer:
def __init__(self, n_input, n_output):
self.W = Param(0.001 * np.random.randn(n_input, n_output))
self.B = Param(0.001 * np.random.randn(1, n_output))
self.X = None
def forward(self, X):
self.X = X
return X @ self.W.value + self.B.value
def backward(self, d_out):
d_input = d_out @ self.W.value.T
d_W = self.X.T @ d_out
self.W.grad += d_W
d_B = np.sum(d_out, axis=0, keepdims=True)
self.B.grad += d_B
return d_input
def params(self):
return {'W': self.W, 'B': self.B}
import numpy as np
class ConvolutionalLayer:
def __init__(self, in_channels, out_channels,
filter_size, padding):
self.filter_size = filter_size
self.in_channels = in_channels
self.out_channels = out_channels
self.W = Param(
np.random.randn(filter_size, filter_size, in_channels, out_channels) * np.sqrt(
2.0 / (filter_size * filter_size * in_channels))
)
self.B = Param(np.zeros(out_channels))
self.padding = padding
self.last_X = None
self.feature_maps = None
self.gradients = None
def forward(self, X):
# get shape of the input tensor
batch_size, in_channels, height, width = X.shape
# compute shape of output tensor
out_height = height - self.filter_size + 1 + 2 * self.padding
out_width = width - self.filter_size + 1 + 2 * self.padding
# pad the input tensor
X = np.pad(X, ((0, 0), (0, 0), (self.padding,) * 2, (self.padding,) * 2))
# save last input for backward pass
self.last_X = X
# create zeros tensor for result
result = np.zeros((batch_size, self.out_channels, out_height, out_width))
# reshape weights to use matrix multiplication trick
weights = self.W.value.reshape(self.in_channels * self.filter_size * self.filter_size, self.out_channels)
# iterate each pixel in output tensor
for y in range(out_height):
for x in range(out_width):
# take the perception widow of the output pixel
patch = X[:, :, y:y + self.filter_size, x:x + self.filter_size]
# unwrap patch to use matrix multiplication trick
patch_flat = patch.reshape(batch_size, self.in_channels * self.filter_size * self.filter_size)
# convolution operation
res = patch_flat @ weights
# add bias to result
res += self.B.value
# add pixels to result tensor
result[:, :, y, x] = res
self.feature_maps = result.copy()
return result
def backward(self, d_out):
self.gradients = d_out.copy()
# get shape of last input passed to forward pass
batch_size, in_channels, height, width = self.last_X.shape
# get shape of the received gradient
_, out_channels, out_height, out_width = d_out.shape
# create d_input with proper shape
d_X = np.zeros_like(self.last_X)
# get and reshape weights to use in calculations
weights = self.W.value.reshape(self.in_channels * self.filter_size * self.filter_size, self.out_channels)
for y in range(out_height):
for x in range(out_width):
# take the gradient patch (batch_size, out_channels)
gradient_patch = d_out[:, :, y, x]
# get and reshape input patch to (batch_size, self.in_channels*self.filter_size*self.filter_size)
input_patch = self.last_X[:, :, y:y + self.filter_size, x:x + self.filter_size]
input_patch_flat = input_patch.reshape(batch_size,
self.in_channels * self.filter_size * self.filter_size)
# d_X to get gradient with respect to input -> d_out + weights
d_input_flat = gradient_patch @ weights.T
d_input_patch = d_input_flat.reshape(batch_size, in_channels, self.filter_size, self.filter_size)
d_X[:, :, y:y + self.filter_size, x:x + self.filter_size] += d_input_patch
# d_w gradient with respect to weights -> d_out + inputs
d_flat_w = input_patch_flat.T @ gradient_patch
d_w = d_flat_w.reshape(self.filter_size, self.filter_size, self.in_channels, self.out_channels)
self.W.grad += d_w
# d_b gradient with respect to bias term -> d_out
d_b = np.sum(gradient_patch, axis=0)
self.B.grad += d_b
# return d_X without padding
return d_X[:, :, self.padding:-self.padding, self.padding:-self.padding] if self.padding > 0 else d_X
def params(self):
return {'W': self.W, 'B': self.B}
class MaxPoolingLayer:
def __init__(self, pool_size, stride):
self.pool_size = pool_size
self.stride = stride
self.X = None
def forward(self, X):
batch_size, channels, height, width = X.shape
self.X = X
# compute output tensor size
out_height = (height - self.pool_size) // self.stride + 1
out_width = (width - self.pool_size) // self.stride + 1
# init resulting tensor with proper shape
result = np.zeros((batch_size, channels, out_height, out_width))
for y in range(out_height):
for x in range(out_width):
start_y = y * self.stride
start_x = x * self.stride
# take input patch
patch = X[:, :, start_y:start_y + self.pool_size, start_x:start_x + self.pool_size]
# get maximum over axis (1,2) means for each batch(0) and chanel(3) get the maximum of matrix (pool_size, pool_size) that stores actual pixel values
patch_max = np.max(patch, axis=(2, 3))
# assign maximum over each batch and chanel to the point in result tensor
result[:, :, y, x] = patch_max
return result
def backward(self, d_out):
batch_size, channels, height, width = self.X.shape
d_input = np.zeros_like(self.X)
out_width = (width - self.pool_size) // self.stride + 1
out_height = (height - self.pool_size) // self.stride + 1
for y in range(out_height):
for x in range(out_width):
input_patch = self.X[:, :, y * self.stride:y * self.stride + self.pool_size,
x * self.stride:x * self.stride + self.pool_size]
# Flatten each patch for max index computation
input_patch_reshaped = input_patch.reshape(batch_size, channels, -1)
# Find the index of the maximum value in each patch
max_idx_local = np.argmax(input_patch_reshaped, axis=2)
# Convert flat indices back to 2D indices
row_idx_local, col_idx_local = np.unravel_index(max_idx_local, (self.pool_size, self.pool_size))
# Convert local indices to global indices in the input tensor
row_idx_global = row_idx_local + y * self.stride
col_idx_global = col_idx_local + x * self.stride
# Batch and channel indices for broadcasting
batch_idx = np.arange(batch_size)[:, None]
ch_idx = np.arange(channels)
# Accumulate gradient values at the positions of the max values
d_input[batch_idx, ch_idx, row_idx_global, col_idx_global] += d_out[:, :, y, x]
return d_input
def params(self):
return {}
class Flattener:
def __init__(self):
self.X_shape = None
def forward(self, X):
batch_size, channels, height, width = X.shape
self.X_shape = X.shape
return X.reshape(batch_size, -1)
def backward(self, d_out):
return d_out.reshape(self.X_shape)
def params(self):
return {}
Implementing Grad-CAM in Custom NumPy Model¶
In this section I extend my NumPy-based convolutional model with support for Grad-CAM visualizations. Grad-CAM allows us to understand which spatial regions of the input were most important for a model’s decision, by tracing back from output predictions to the internal convolutional features. I implemented three versions of Grad-CAM progressively, starting from basic intuition to the original algorithm.
grad_cam_v1 — Gradient w.r.t. Input¶
In the first version, I compute the gradient of the final logits with respect to the input image. This method is conceptually aligned with the idea of derivatives which actually tells how much a change in that input pixel would affect the final prediction(logits).
Steps:
- Perform a forward pass to get the prediction logits.
- Backpropagate from the logits to the input.
- Take the absolute value of the input gradients and aggregate over channels to form a heatmap.
- Visualize it alongside the input.
This version produces a sensitivity map, but it lacks localization in deeper features and tends to highlight high-frequency noise.
grad_cam_v2 — Target Class Masking¶
In the second version, I introduce masking to isolate the gradient signal for a specific target class. Rather than backpropagating from all outputs (which mixes gradients from multiple classes), I zero out the logits of all other classes and backpropagate only from the one of interest.
Steps:
- Perform a forward pass to compute logits.
- Create a mask that keeps only the logit of the target class active.
- Backpropagate only from the target logit.
- Compute gradients w.r.t. the input and visualize as in v1.
This approach improves the relevance of the heatmap to the specific class and removes confusion from unrelated outputs.
grad_cam_v3 — Original Grad-CAM¶
The third version follows the original Grad-CAM method more closely. Instead of working with gradients at the input level, we inspect the gradients at a chosen convolutional layer and use them to compute class-specific importance for each feature channel.
Steps:
- Forward pass with capture of the convolutional feature maps.
- Backward pass from the target logit to compute gradients w.r.t. those features.
- Global average pool the gradients to get a weight for each channel.
- Compute a weighted sum of the feature maps and apply ReLU.
- Normalize and upsample to input size for visualization.
class Model:
def __init__(self, layers):
"""
Initialize the model with a list of layers.
"""
self.layers = layers
self.last_output = None
def forward(self, x):
"""
Perform a forward pass through all layers.
"""
for layer in self.layers:
x = layer.forward(x)
self.last_output = x
return x
def backward(self, grad):
"""
Backpropagate the gradient through all layers.
"""
for layer in reversed(self.layers):
grad = layer.backward(grad)
return grad
def grad_cam_v1(self, input_bathed, threshold=0.3):
"""
Computes gradients of logits w.r.t. input image.
Visualizes input sensitivity across all channels.
"""
self.forward(input_bathed)
input_grad = self.backward(self.last_output)
self.clear_params()
alpha = 0.5
for inp, mask in zip(input_batched, input_grad):
heatmap = np.max(np.abs(mask), axis=0)
heatmap_min = np.min(heatmap)
heatmap_max = np.max(heatmap)
heatmap = (heatmap - heatmap_min) / (heatmap_max - heatmap_min + 1e-8)
heatmap = np.where(heatmap < threshold, 0, heatmap)
inp_img = inp.permute(1, 2, 0).numpy()
fig, axs = plt.subplots(1, 3, figsize=(12, 4))
axs[0].imshow(inp_img)
axs[0].imshow(heatmap, alpha=alpha)
axs[0].set_title("Overlay")
axs[0].axis('off')
axs[1].imshow(inp_img)
axs[1].set_title("Original")
axs[1].axis('off')
axs[2].imshow(heatmap, cmap='jet')
axs[2].set_title("Heatmap")
axs[2].axis('off')
plt.tight_layout()
plt.show()
return input_grad
def grad_cam_v2(self, input_bathed, target_class = None, threshold=0):
"""
Computes gradients of only the target class logit w.r.t. input image.
Focuses attention on class-specific sensitivity regions.
"""
outputs = self.forward(input_bathed)
batch_size = input_bathed.shape[0]
masked_output = np.zeros_like(outputs)
masked_output[np.arange(batch_size), target_class] = outputs[np.arange(batch_size), target_class]
input_grad = self.backward(masked_output)
self.clear_params()
alpha = 0.5
for inp, mask in zip(input_batched, input_grad):
heatmap = np.max(np.abs(mask), axis=0)
heatmap_min = np.min(heatmap)
heatmap_max = np.max(heatmap)
heatmap = (heatmap - heatmap_min) / (heatmap_max - heatmap_min + 1e-8)
heatmap = np.where(heatmap < threshold, 0, heatmap)
inp_img = inp.permute(1, 2, 0).numpy()
fig, axs = plt.subplots(1, 3, figsize=(12, 4))
axs[0].imshow(inp_img)
axs[0].imshow(heatmap, alpha=alpha,)
axs[0].set_title("Overlay")
axs[0].axis('off')
axs[1].imshow(inp_img)
axs[1].set_title("Original")
axs[1].axis('off')
axs[2].imshow(heatmap, cmap='jet')
axs[2].set_title("Heatmap")
axs[2].axis('off')
plt.tight_layout()
plt.show()
return input_grad
def grad_cam_v3(self, input_batched, target_class=None, conv_layer_index=-1, threshold=0.0, alpha=0.5):
"""
Implements the original Grad-CAM algorithm.
Uses gradients of feature maps in a selected convolutional layer
to compute class-specific spatial importance heatmaps.
"""
# Forward pass while capturing feature maps from the chosen conv layer.
feature_maps = None
target_conv_layer = None
x = input_batched
for idx, layer in enumerate(self.layers):
x = layer.forward(x)
# If this is the selected convolutional layer:
if idx == conv_layer_index and hasattr(layer, 'feature_maps'):
feature_maps = layer.feature_maps.copy() # [batch_size, channels, conv_h, conv_w]
target_conv_layer = layer
outputs = x
batch_size = outputs.shape[0]
target_class = np.array(target_class)
# Mask outputs so that only the target class logit contributes.
masked_output = np.zeros_like(outputs)
masked_output[np.arange(batch_size), target_class] = outputs[np.arange(batch_size), target_class]
# Backward pass: compute gradients using the masked output.
self.backward(masked_output)
self.clear_params()
# Retrieve gradients from the selected conv layer.
grads = target_conv_layer.gradients # shape: [batch_size, channels, conv_h, conv_w]
# Compute channel-wise weights by global average pooling.
weights = np.mean(grads, axis=(2, 3)) # shape: [batch_size, channels]
heatmaps = []
for i in range(batch_size):
cam = np.zeros(feature_maps.shape[2:], dtype=np.float32)
for k in range(feature_maps.shape[1]):
cam += weights[i, k] * feature_maps[i, k, :, :]
cam = np.maximum(cam, 0) # Relu
# Normalize the heatmap.
cam_min, cam_max = cam.min(), cam.max()
cam = (cam - cam_min) / (cam_max - cam_min + 1e-8)
if threshold > 0:
cam = np.where(cam < threshold, 0, cam)
heatmaps.append(cam)
heatmaps = np.array(heatmaps) # shape: [batch_size, conv_h, conv_w]
# Upsample the heatmaps to the size of the input images and display.
for inp, cam in zip(input_batched, heatmaps):
inp_img = inp.permute(1, 2, 0).detach().cpu().numpy()
upsampled_cam = resize(cam, (inp_img.shape[0], inp_img.shape[1]), mode='reflect', anti_aliasing=True)
fig, axs = plt.subplots(1, 3, figsize=(12, 4))
axs[0].imshow(inp_img)
axs[0].imshow(upsampled_cam, alpha=alpha)
axs[0].set_title("Overlay")
axs[0].axis('off')
axs[1].imshow(inp_img)
axs[1].set_title("Original")
axs[1].axis('off')
axs[2].imshow(upsampled_cam, cmap='jet')
axs[2].set_title("Heatmap")
axs[2].axis('off')
plt.tight_layout()
plt.show()
return heatmaps
def compute_loss_and_grad(self, predictions, target_index):
"""
Compute the softmax with cross entropy loss and return both the loss
and the gradient.
"""
loss, grad = softmax_with_cross_entropy(predictions, target_index)
return loss, grad
def params(self):
"""
Collects parameters from all layers.
Returns a dictionary mapping layer names to their parameters.
"""
parameters = {}
for idx, layer in enumerate(self.layers):
for name, param in layer.params().items():
parameters[f"layer{idx}_{name}"] = param
return parameters
def clear_params(self):
"""
Clear all parameters.
"""
for param in self.params().values():
param.grad = np.zeros_like(param.grad)
def update_params(self, lr):
"""
Updates all parameters.
"""
for param in self.params().values():
param.value -= lr * param.grad
param.grad = np.zeros_like(param.grad)
def train(self, lr, epochs, data_loader):
"""
Trains the model.
data_loader should yield batches as (x, y) where:
- x is input with shape (batch_size, input_size)
- y contains the target class indices for each sample.
"""
loss_list = []
for epoch in range(epochs):
epoch_loss = 0.0
for x, y in data_loader:
# Forward pass: compute predictions for the current batch.
predictions = self.forward(x)
# Compute loss and gradient at the output.
loss, grad = self.compute_loss_and_grad(predictions, y)
epoch_loss += loss
# Backward pass: propagate gradients through the network.
self.backward(grad)
# Update all parameters using gradient descent.
self.update_params(lr)
avg_loss = epoch_loss / len(data_loader)
loss_list.append(avg_loss)
print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")
# Plot training loss.
plt.plot(loss_list)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.show()
class MomentumSGD:
def __init__(self, momentum=0.9):
self.momentum = 0.9
self.velocities = {}
def update(self, w, d_w, learning_rate, param_name):
if param_name not in self.velocities:
self.velocities[param_name] = np.zeros_like(w)
self.velocities[param_name] = self.momentum * self.velocities[param_name] - learning_rate * d_w
return w + self.velocities[param_name]
def train_model(model, train_loader, test_loader, epochs, lr, optimizer=MomentumSGD(), save_path=None):
history = {
'train_loss': [],
'train_acc': [],
'test_loss': [],
'test_acc': []
}
for epoch in range(epochs):
# Training phase
model.train_mode = True
epoch_loss = 0.0
correct_train = 0
total_train = 0
batch_count = 0
print(f"Epoch {epoch + 1}/{epochs}")
print("-" * 30)
for x, y in train_loader:
# Data conversion and transfer
x = x.numpy().astype(np.float32)
y = y.numpy()
# Forward pass
predictions = model.forward(x)
# Loss computation
loss, grad = softmax_with_cross_entropy(predictions, y)
# Calculate training accuracy
pred_classes = np.argmax(predictions, axis=1)
correct_train += np.sum(pred_classes == y)
total_train += len(y)
# Backward pass
model.backward(grad)
# Parameter update
for param_name, param in model.params().items():
param.value = optimizer.update(param.value, param.grad, lr, param_name)
param.grad = np.zeros_like(param.grad)
epoch_loss += loss
batch_count += 1
# Calculate training metrics
avg_train_loss = epoch_loss / batch_count
train_accuracy = (correct_train / total_train) * 100
history['train_loss'].append(avg_train_loss)
history['train_acc'].append(train_accuracy)
# Testing phase
model.train_mode = False
test_loss = 0.0
correct_test = 0
total_test = 0
test_batch_count = 0
for x, y in test_loader:
# Data conversion and transfer
x = x.numpy().astype(np.float32)
y = y.numpy()
# Forward pass only (no backward pass during testing)
predictions = model.forward(x)
# Calculate test loss
batch_loss, _ = softmax_with_cross_entropy(predictions, y)
test_loss += batch_loss
# Calculate test accuracy
pred_classes = np.argmax(predictions, axis=1)
correct_test += np.sum(pred_classes == y)
total_test += len(y)
test_batch_count += 1
# Calculate test metrics
avg_test_loss = test_loss / test_batch_count
test_accuracy = (correct_test / total_test) * 100
history['test_loss'].append(avg_test_loss)
history['test_acc'].append(test_accuracy)
# Print metrics
print(f"Train Loss: {avg_train_loss:.4f} | Train Acc: {train_accuracy:.2f}%")
print(f"Test Loss: {avg_test_loss:.4f} | Test Acc: {test_accuracy:.2f}%")
return history
model = Model([
ConvolutionalLayer(1, 16, 3, 1), # Input: 1 channel, Output: 16 channels, 3x3 kernel, padding 1
ReLULayer(),
ConvolutionalLayer(16, 32, 3, 1), # 16 -> 32 channels, 3x3 kernel, padding 1
ReLULayer(),
MaxPoolingLayer(2, 2), # 2x2 pooling with stride 2 -> output 32 * 14 * 14
Flattener(),
FullyConnectedLayer(32 * 14 * 14, 10),
])
epochs = 6
learning_rate = 0.001
train_history = train_model(model, train_loader, test_loader, epochs, learning_rate)
Epoch 1/6 ------------------------------ Train Loss: 2.1801 | Train Acc: 25.20% Test Loss: 1.9430 | Test Acc: 45.16% Epoch 2/6 ------------------------------ Train Loss: 1.6803 | Train Acc: 59.80% Test Loss: 1.4002 | Test Acc: 72.31% Epoch 3/6 ------------------------------ Train Loss: 1.1905 | Train Acc: 76.90% Test Loss: 1.0027 | Test Acc: 79.87% Epoch 4/6 ------------------------------ Train Loss: 0.8693 | Train Acc: 82.10% Test Loss: 0.7747 | Test Acc: 82.52% Epoch 5/6 ------------------------------ Train Loss: 0.6829 | Train Acc: 84.10% Test Loss: 0.6389 | Test Acc: 84.64% Epoch 6/6 ------------------------------ Train Loss: 0.5673 | Train Acc: 85.90% Test Loss: 0.5575 | Test Acc: 85.94%
num_samples = 3
input_batched = torch.stack([train_dataset[i][0] for i in range(num_samples)])
target_class = np.stack([train_dataset[i][1] for i in range(num_samples)])
grad_cam_mask = model.grad_cam_v1(input_batched, threshold=0)[0]
grad_cam_mask = model.grad_cam_v2(input_batched, target_class=target_class, threshold=0)[0]
grad_cam_mask = model.grad_cam_v3(input_batched, target_class=target_class,conv_layer_index=2, threshold=0)[0]
Results and Reflections¶
As for me, the custom implementation provided more information, particularly on the MNIST dataset. The more straightforward methods (v1 and v2) gave sharper and more intuitive signals about what the model was actually attending to.
For instance, in the case of the digit zero, we could clearly see that the model responds to the circular outline and the empty middle space. These versions made it possible to interpret the prediction by identifying structural elements like angles and boundaries.
On the other hand, the v3 implementation (original Grad-CAM) appeared more abstract. It resembled a probabilistic mask, indicating which parts of the image contributed positively to the target class. While this gave a good sense of class confidence spread, it often felt more like a soft coloring over the image than a precise signal.
This was especially visible with digits like 0 and 5, where the heatmap highlighted confident regions.
For digit 4, the visualization is inconsistent and somewhat buggy. I didn’t investigate this further.
Cat vs Dog Classification with Grad‑CAM (Using PyTorch)¶
In the second part of the assignment, we apply Grad-CAM to a more realistic dataset with higher-resolution images to evaluate its interpretability in more complex visual scenarios.
We use the Cats vs Dogs dataset from Kaggle. The dataset provides a clearer distinction between classes and more natural image variation compared to datasets like MNIST or CIFAR-10.
To handle these images, we define a simple CNN using PyTorch with adaptive pooling to accommodate the larger input size. Grad-CAM is implemented using forward and backward hooks on a convolutional layer, allowing us to visualize which regions of the image contribute most to the model's predictions.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
Step 1: Load Dataset¶
Download dataset using Kaggle hub
import kagglehub
path = kagglehub.dataset_download("shaunthesheep/microsoft-catsvsdogs-dataset")
print("Path to dataset files:", path)
/home/may33/miniconda3/envs/cnn/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
Path to dataset files: /home/may33/.cache/kagglehub/datasets/shaunthesheep/microsoft-catsvsdogs-dataset/versions/1
import os
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
class CatDogKaggleDataset(Dataset):
def __init__(self, root, transform=None):
self.transform = transform
self.image_files = []
self.labels = []
for folder, label in zip(["Cat", "Dog"], [0, 1]):
folder_path = os.path.join(root, folder)
for fname in os.listdir(folder_path):
if fname.lower().endswith('.jpg'):
self.image_files.append(os.path.join(folder_path, fname))
self.labels.append(label)
def __len__(self):
return len(self.image_files)
def __getitem__(self, index):
img_path = self.image_files[index]
img = Image.open(img_path).convert('RGB')
label = self.labels[index]
if self.transform:
img = self.transform(img)
return img, label
Step 2: Define the CNN Model¶
Our model consists of:
- Two four layers with ReLU and max pooling.
- An adaptive average pooling layer to fix the output size.
- A fully connected layer that outputs logits for 2 classes.
We choose an adaptive pooling size of (7,7) so that the FC layer's input size is 32 * 7 * 7 = 1568.
class CatVsDogCNN(nn.Module):
def __init__(self):
super(CatVsDogCNN, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, padding=1)
self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=1)
self.conv3 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=1)
self.pool = nn.MaxPool2d(2, 2)
# Adaptive average pooling to (7,7)
self.adaptive_pool = nn.AdaptiveAvgPool2d((7, 7))
# Fully connected layer: input size = 32 * 7 * 7, output = 2 classes (cat, dog)
self.fc = nn.Linear(32 * 7 * 7, 2)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
x = self.adaptive_pool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
Step 3: Implement Grad‑CAM¶
The GradCAM class registers hooks on a target layer (we use conv4) to capture:
- The forward activations (feature maps).
- The gradients during the backward pass.
The heatmap is computed as a weighted sum of the feature maps (using global average pooling of the gradients as weights), followed by ReLU and normalization.
class GradCAM:
def __init__(self, model, target_layer):
self.model = model
self.target_layer = target_layer
self.gradients = None
self.activations = None
self._register_hooks()
def _register_hooks(self):
def forward_hook(module, input, output):
self.activations = output.detach()
def backward_hook(module, grad_in, grad_out):
self.gradients = grad_out[0].detach()
self.target_layer.register_forward_hook(forward_hook)
self.target_layer.register_backward_hook(backward_hook)
def generate(self, input_tensor, target_class=None):
output = self.model(input_tensor)
if target_class is None:
target_class = output.argmax(dim=1).item()
self.model.zero_grad()
score = output[0, target_class]
score.backward()
# global average pooling on gradients
weights = self.gradients.mean(dim=[2, 3], keepdim=True)
# weighted combination of activations
cam = (weights * self.activations).sum(dim=1, keepdim=True)
cam = F.relu(cam)
# normalization
cam = cam - cam.min()
cam = cam / (cam.max() + 1e-8)
cam = cam.squeeze().cpu().numpy()
return cam, target_class
Step 4: Prepare Data¶
I use the downloaded Microsoft Cats vs Dogs dataset from Kaggle, which contains a large number of natural images divided into two folders: one for cats and one for dogs. Each class has varying backgrounds, poses, and lighting conditions.
To work with this data:
- define a custom dataset class that assigns labels based on folder names ("Cat" = 0, "Dog" = 1).
- Images are resized to 224×224 and normalized using ImageNet statistics.
- The dataset is split into 80% training and 20% testing using
random_split. - DataLoaders are constructed for efficient batching and shuffling during training and evaluation.
dataset_path = "/home/may33/.cache/kagglehub/datasets/shaunthesheep/microsoft-catsvsdogs-dataset/versions/1/PetImages"
train_dataset = CatDogKaggleDataset(root=dataset_path, transform=transform)
total_size = len(train_dataset)
train_size = int(0.8 * total_size)
val_size = total_size - train_size
train_subset, test_subset = random_split(train_dataset, [train_size, val_size])
batch_size = 256
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=True)
def denormalize(img_tensor):
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
img = img_tensor.squeeze().cpu().permute(1, 2, 0).numpy()
img = std * img + mean
img = np.clip(img, 0, 1)
return img
print("Total samples:", total_size)
print("Train samples:", len(train_subset))
print("Test samples:", len(test_subset))
Total samples: 24998 Train samples: 19998 Test samples: 5000
input_img = train_dataset[0][0]
img_np = denormalize(input_img)
plt.imshow(img_np)
plt.grid(False)
plt.show()
def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, scheduler=None):
"""
Enhanced training function that tracks and reports all key metrics
Args:
model: Neural network model
train_loader: DataLoader for training data
test_loader: DataLoader for test data
criterion: Loss function
optimizer: Optimization algorithm
num_epochs: Number of training epochs
scheduler: Optional learning rate scheduler
Returns:
dict: Dictionary containing training history with all metrics
"""
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []
for epoch in range(1, num_epochs + 1):
# Training phase
model.train()
running_loss = 0.0
correct_train = 0
total_train = 0
for images, labels in train_loader:
# Move data to GPU
images, labels = images.to(device), labels.to(device)
# Forward pass
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
# Calculate training accuracy
_, predicted = torch.max(outputs, 1)
total_train += labels.size(0)
correct_train += (predicted == labels).sum().item()
# Backward pass
loss.backward()
optimizer.step()
# Accumulate loss
running_loss += loss.item() * images.size(0)
# Calculate training metrics
epoch_loss = running_loss / len(train_loader.dataset)
train_accuracy = 100 * correct_train / total_train
train_losses.append(epoch_loss)
train_accuracies.append(train_accuracy)
# Evaluation phase
model.eval()
test_loss = 0.0
correct_test = 0
total_test = 0
with torch.no_grad():
for images, labels in test_loader:
# Move data to GPU
images, labels = images.to(device), labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Calculate test accuracy
_, predicted = torch.max(outputs, 1)
total_test += labels.size(0)
correct_test += (predicted == labels).sum().item()
# Accumulate loss
test_loss += loss.item() * images.size(0)
# Calculate test metrics
epoch_test_loss = test_loss / len(test_loader.dataset)
test_accuracy = 100 * correct_test / total_test
test_losses.append(epoch_test_loss)
test_accuracies.append(test_accuracy)
# Scheduler step: for ReduceLROnPlateau, step using test loss; else, step normally.
if scheduler:
if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
scheduler.step(epoch_test_loss)
else:
scheduler.step()
# Print all metrics
print(f"Epoch {epoch}/{num_epochs} - "
f"Train Loss: {epoch_loss:.4f}, Train Acc: {train_accuracy:.2f}%, "
f"Test Loss: {epoch_test_loss:.4f}, Test Acc: {test_accuracy:.2f}%")
return {
'train_loss': train_losses,
'train_acc': train_accuracies,
'test_loss': test_losses,
'test_acc': test_accuracies
}
num_epochs = 20
model = CatVsDogCNN().to(device)
model.train()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.2)
history = train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, scheduler)
Epoch 1/20 - Train Loss: 0.6237, Train Acc: 64.33%, Test Loss: 0.5992, Test Acc: 67.52% Epoch 2/20 - Train Loss: 0.5484, Train Acc: 72.05%, Test Loss: 0.5184, Test Acc: 74.74% Epoch 3/20 - Train Loss: 0.4914, Train Acc: 76.39%, Test Loss: 0.6587, Test Acc: 67.68% Epoch 4/20 - Train Loss: 0.4701, Train Acc: 78.05%, Test Loss: 0.4904, Test Acc: 76.28% Epoch 5/20 - Train Loss: 0.4404, Train Acc: 79.84%, Test Loss: 0.4647, Test Acc: 79.14% Epoch 6/20 - Train Loss: 0.3995, Train Acc: 82.06%, Test Loss: 0.4157, Test Acc: 81.86% Epoch 7/20 - Train Loss: 0.3875, Train Acc: 82.95%, Test Loss: 0.4102, Test Acc: 82.50% Epoch 8/20 - Train Loss: 0.3818, Train Acc: 83.21%, Test Loss: 0.4052, Test Acc: 82.00% Epoch 9/20 - Train Loss: 0.3747, Train Acc: 83.55%, Test Loss: 0.4047, Test Acc: 83.04% Epoch 10/20 - Train Loss: 0.3669, Train Acc: 83.96%, Test Loss: 0.3968, Test Acc: 83.56% Epoch 11/20 - Train Loss: 0.3596, Train Acc: 84.43%, Test Loss: 0.3886, Test Acc: 83.40% Epoch 12/20 - Train Loss: 0.3557, Train Acc: 84.68%, Test Loss: 0.3877, Test Acc: 83.62% Epoch 13/20 - Train Loss: 0.3550, Train Acc: 84.73%, Test Loss: 0.3879, Test Acc: 83.42% Epoch 14/20 - Train Loss: 0.3533, Train Acc: 84.76%, Test Loss: 0.3857, Test Acc: 83.86% Epoch 15/20 - Train Loss: 0.3510, Train Acc: 85.10%, Test Loss: 0.3839, Test Acc: 83.50% Epoch 16/20 - Train Loss: 0.3488, Train Acc: 85.04%, Test Loss: 0.3836, Test Acc: 83.48% Epoch 17/20 - Train Loss: 0.3482, Train Acc: 85.01%, Test Loss: 0.3841, Test Acc: 83.40% Epoch 18/20 - Train Loss: 0.3481, Train Acc: 85.08%, Test Loss: 0.3837, Test Acc: 83.88% Epoch 19/20 - Train Loss: 0.3478, Train Acc: 85.06%, Test Loss: 0.3829, Test Acc: 83.80% Epoch 20/20 - Train Loss: 0.3474, Train Acc: 85.12%, Test Loss: 0.3827, Test Acc: 83.70%
# Set model to evaluation mode.
model.eval()
# Instantiate GradCAM using the desired target layer
gradcam = GradCAM(model, target_layer=model.conv3)
num_images = 20
random_indices = random.sample(range(len(test_subset)), num_images)
for idx in random_indices:
# Load a random image from the test dataset
img, label = test_subset[idx]
input_img = img.unsqueeze(0).to(device)
# Generate Grad-CAM heatmaps for both class 0 (cat) and class 1 (dog)
cam_cat, _ = gradcam.generate(input_img, target_class=0)
cam_dog, _ = gradcam.generate(input_img, target_class=1)
# Denormalize for visualization
orig_img = denormalize(input_img)
# Resize both CAMs
cam_cat_resized = resize(cam_cat, (224, 224), mode='reflect', anti_aliasing=True)
cam_dog_resized = resize(cam_dog, (224, 224), mode='reflect', anti_aliasing=True)
# Plot original + both Grad-CAMs
fig, axs = plt.subplots(1, 3, figsize=(16, 4))
axs[0].imshow(orig_img)
axs[0].set_title("Original Image")
axs[0].axis('off')
axs[1].imshow(orig_img)
axs[1].imshow(cam_cat_resized, cmap='jet', alpha=0.5)
axs[1].set_title("Grad-CAM: Cat (class 0)")
axs[1].axis('off')
axs[2].imshow(orig_img)
axs[2].imshow(cam_dog_resized, cmap='jet', alpha=0.5)
axs[2].set_title("Grad-CAM: Dog (class 1)")
axs[2].axis('off')
plt.tight_layout()
plt.show()
print(f"Sample idx {idx} | Ground Truth: {label} | Predicted: {model(input_img).argmax(dim=1).item()}")
Sample idx 3502 | Ground Truth: 0 | Predicted: 0
Sample idx 3046 | Ground Truth: 1 | Predicted: 1
Sample idx 4818 | Ground Truth: 1 | Predicted: 1
Sample idx 4177 | Ground Truth: 0 | Predicted: 1
Sample idx 1968 | Ground Truth: 1 | Predicted: 1
Sample idx 2005 | Ground Truth: 0 | Predicted: 0
Sample idx 2103 | Ground Truth: 1 | Predicted: 0
Sample idx 3789 | Ground Truth: 1 | Predicted: 1
Sample idx 4950 | Ground Truth: 1 | Predicted: 1
Sample idx 2626 | Ground Truth: 0 | Predicted: 1
Sample idx 3477 | Ground Truth: 1 | Predicted: 1
Sample idx 4008 | Ground Truth: 0 | Predicted: 0
Sample idx 3934 | Ground Truth: 1 | Predicted: 1
Sample idx 397 | Ground Truth: 1 | Predicted: 0
Sample idx 3605 | Ground Truth: 1 | Predicted: 1
Sample idx 2703 | Ground Truth: 1 | Predicted: 1
Sample idx 524 | Ground Truth: 0 | Predicted: 0
Sample idx 1597 | Ground Truth: 1 | Predicted: 0
Sample idx 2730 | Ground Truth: 0 | Predicted: 0
Sample idx 4507 | Ground Truth: 0 | Predicted: 0
Results¶
It was actually a lot of fun to visually explore these results. Grad-CAM turned out to be a great tool to understand how the model processes images. I ended up adding more and more examples just to see the patterns the model relies on.
Here are some key observations:
Grad-CAM does reveal meaningful regions associated with class decisions. The highlighted areas often correspond to intuitive and domain-specific features.
Cat-specific focus: For cats, the model frequently attends to large whiskers, the shape and position of the eyes (especially in kittens), and the patterns on the forehead and ears.
Dog-specific focus: For dogs, the attention is often drawn to the mouth area (especially when it’s open or the tongue is visible), curled ears, and overall head shape.
Accessory bias: The model often focuses heavily on collars, leashes, and similar accessories—strong indicators of “dog” in this dataset. This occasionally leads to misclassifications, such as labeling a cat with a toy or wearing a collar as a dog.
Contextual cues: Background elements also influence decisions. Dogs are more frequently shown in cages or outdoors (e.g., on grass), and the model seems to learn these patterns as latent features.
Overall, Grad-CAM was useful not only for explaining predictions but also for uncovering dataset biases and model shortcuts.